library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(foreign)
library(hms)
library(doBy)
library(ggplot2)

#Clear
cat("\014")  
rm(list=ls())

#Set Directory
setwd("replication")

#For Map Figure - which DMA's in our final sample
all_dd_data <- readRDS("data/all_dd_data.rds")
DMAs_in_sample<-unique(all_dd_data$dma_code)
write.csv(DMAs_in_sample,"data/DMAs_in_sample.csv")


# by ad sponsor type
load("data/dma_timezone.RData")
dma_timezone <- dma_timezone %>% 
  select(dma_code, timezone) %>%
  mutate(timezone = recode(timezone, ETZ = "America/New_York", CTZ = "America/Chicago", PTZ="America/Los_Angeles")) %>%
  filter(!is.na(dma_code))


### THESE DATA ARE PROPRIETARY (NIELSEN ADS)
load("data/sengov_ads_2010_2014.RData")
names(sengov.ads)[18] <- "grp_p18"
names(sengov.ads)[19] <- "imp_p18"
names(sengov.ads)[20] <- "cpm_p18"

load("data/congressional_ads_2010_2014.RData")

load("data/pac_ads_2010_2014.RData")
names(group.ads)[18] <- "grp_p18"
names(group.ads)[19] <- "imp_p18"
names(group.ads)[20] <- "cpm_p18"

load("data/pres_ads_2012.RData")
names(pres.ads)[18] <- "grp_p18"
names(pres.ads)[19] <- "imp_p18"
names(pres.ads)[20] <- "cpm_p18"

ads_dma <- read_csv("data/dmacodes.csv") %>% 
  mutate(Market = str_trim(substr(Market,1,15)))

ads <- bind_rows(congress.ads %>% mutate(ad_type = "cand_house"), 
                 sengov.ads %>% mutate(ad_type = case_when(office.x %in% c("gov", "ltgov", "oth") ~ "cand_statewide", office.x %in% c("sen", "sen2") ~ "cand_senate")), 
                 group.ads %>% mutate(ad_type = "outside"), 
                 pres.ads %>% mutate(ad_type = "cand_pres")) %>% 
  mutate(Market = str_trim(substr(Market,1,15))) %>%
  left_join(ads_dma) %>%
  mutate(dma_code=as.character(DMA)) %>%
  inner_join(dma_timezone) %>%  ## restricts to ads in STB markets
  mutate(
    channel = sub(" *([DKMNWX][A-Z]+) \\(([A-Z]+)\\) *", "\\1", Distributor),
    affiliate = sub(" *([DKMNWX][A-Z]+) \\(([A-Z]+)\\) *", "\\2", Distributor),
    program = str_trim(Program.Title)
  )

# select dates between 9/1 and 11/6
ads <- ads %>% 
  mutate(Date = mdy(Date), Time = as.hms(Time)) %>%
  filter(Date>=mdy("09/01/2012"), Date<=mdy("11/06/2012")) %>%
  as.data.table

#Cost of ad = cost per impression * number of impressions (viewers over 18)
ads$cost_final<-ads$imp_p18*ads$cpm_p18 

#Sum total spending per campaign in each day
collapse1 <- summaryBy(cost_final ~ Date + ad_type, data=ads, FUN=sum, na.omit=TRUE)

#In millions of dollars
collapse1$cost_final.sum<-collapse1$cost_final.sum/1000000 

#Plot graphs
p<-ggplot(collapse1, aes(x=Date, y=cost_final.sum, group=ad_type)) +
  geom_line(aes(linetype=ad_type), lwd = 1)+
 geom_point(aes(shape=ad_type),size=2)+
  scale_linetype_manual(values=c("solid", "longdash", "solid", "solid","dotted"))+ #Change Lines
  scale_shape_manual(values=c(15,NA,NA,21,NA)) +                  # Change shapes
  theme(legend.position="right")

# Change axis labels
p <- p + labs(y="Spending in Ads (US$ Millions)") +
  theme(
  plot.title = element_text(color="black", size=12, face="bold"),
  axis.title.x = element_text(color="black", size=12, face="bold"),
  axis.title.y = element_text(color="black", size=12, face="bold")
)

ggsave(p, filename = "plots/tv_spending_2012_relabeled.png")

#qplot(collapse1$Date, collapse1$cost.sum)
#cbind(data, total = rowSums(data))